Trabajo Complementos - 31 enero 2016 - Analise de la Correlacion entre las variables (Pobl, Natalidad, EsperanzaVida, Mortalidad).

# Author: Caio Fernandes Moreno <caiofern@ucm.es | caiomsouza@gmail.com>

setwd("/Users/caiomsouza/git/Bitbucket/ucm/COMPLEMENTOS_DE_FORMACION_EN_TECNICAS_DE_MINERIA_DE_DATOS/tareas-entregar/trabajo-31enero16")

paises <- read.csv(file="DatosPaises.csv",head=TRUE,sep=",")
head(paises, 10)
##              Pais  Pobl Natalidad EsperanzaVida Mortalidad
## 1   Afganist\x87n 27963      35.6          59.8        8.6
## 2         Albania  2902      13.1          77.5        7.2
## 3        Alemania 80435       8.3          80.7       10.8
## 4          Angola 21220      46.2          51.7       14.2
## 5  Arabia Saudita 28091      20.8          74.1        3.4
## 6         Argelia 36036      25.1          74.4        5.1
## 7       Argentina 41223      17.8          76.0        7.6
## 8         Armenia  2963      13.3          74.6        9.0
## 9       Australia 22163      13.5          82.1        6.7
## 10        Austria  8392       9.5          81.1        9.4
##    BalanzaComercial   PIB ProdCereales
## 1             -4766   566    157.13532
## 2             -2861  3786    577.68526
## 3            205408 41100   2659.28619
## 4             29864  4221     19.54153
## 5            144283 19327     10.28894
## 6             17558  4350    113.11257
## 7             12057 11508    310.53185
## 8             -2771  3125    191.30778
## 9             10724 57593     82.63341
## 10            -5712 46377   1593.54430
#Dejar solo POBL NATALIDA ESPERANZ MORTALID

paises.valores <- paises
# Remove la columna Paises
paises.valores$Pais <- NULL

# Remove la columna BalanzaComercial
paises.valores$BalanzaComercial <- NULL

# Remove la columna PIB
paises.valores$PIB <- NULL

# Remove la columna ProdCereales
paises.valores$ProdCereales <- NULL

head(paises.valores,10)
##     Pobl Natalidad EsperanzaVida Mortalidad
## 1  27963      35.6          59.8        8.6
## 2   2902      13.1          77.5        7.2
## 3  80435       8.3          80.7       10.8
## 4  21220      46.2          51.7       14.2
## 5  28091      20.8          74.1        3.4
## 6  36036      25.1          74.4        5.1
## 7  41223      17.8          76.0        7.6
## 8   2963      13.3          74.6        9.0
## 9  22163      13.5          82.1        6.7
## 10  8392       9.5          81.1        9.4
colnames(paises.valores)
## [1] "Pobl"          "Natalidad"     "EsperanzaVida" "Mortalidad"
# Normaliza las variables 
paises.valores.normalizar <- scale(paises.valores)
head(paises.valores.normalizar, 10)
##              Pobl  Natalidad EsperanzaVida  Mortalidad
##  [1,] -0.03332065  1.2692333    -1.2643593  0.07557593
##  [2,] -0.64060228 -0.8127601     0.7680260 -0.41474593
##  [3,]  1.23818813 -1.2569187     1.1354628  0.84608170
##  [4,] -0.19671796  2.2500835    -2.1944339  2.03686334
##  [5,] -0.03021893 -0.1002557     0.3776243 -1.74561954
##  [6,]  0.16230541  0.2976364     0.4120715 -1.15022871
##  [7,]  0.28799751 -0.3778548     0.5957899 -0.27465397
##  [8,] -0.63912412 -0.7942535     0.4350363  0.21566788
##  [9,] -0.17386705 -0.7757469     1.2962165 -0.58986088
## [10,] -0.50756783 -1.1458791     1.1813925  0.35575984
paises.cor <- cor(paises.valores.normalizar)
#View(paises.cor)
paises.cor
##                      Pobl   Natalidad EsperanzaVida  Mortalidad
## Pobl           1.00000000 -0.03243038   -0.01445153 -0.03519014
## Natalidad     -0.03243038  1.00000000   -0.87063840  0.06833273
## EsperanzaVida -0.01445153 -0.87063840    1.00000000 -0.38664292
## Mortalidad    -0.03519014  0.06833273   -0.38664292  1.00000000
cat("Se puede ver una correlacion muy alta entre EsperanzaVida y Natalidad de -0.87063840")
## Se puede ver una correlacion muy alta entre EsperanzaVida y Natalidad de -0.87063840
cat("Se percibe que cuanto mayor la Esperanza de Vida menos niños en un pais.")
## Se percibe que cuanto mayor la Esperanza de Vida menos niños en un pais.
# Utilizando la libreria corrplot para visualizar mejor las correlaciones entre las variables.
# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html

library(corrplot)
M <- cor(paises.valores.normalizar)
corrplot(M, method = "circle")

corrplot(M, method = "square")

corrplot(M, method = "ellipse")

corrplot(M, method = "number")

corrplot(M, method = "shade")

corrplot(M, method = "color")

corrplot(M, method = "pie")

corrplot(M, type = "upper")

corrplot(M, type = "lower")

corrplot.mixed(M)

corrplot.mixed(M, lower = "ellipse", upper = "circle")

corrplot.mixed(M, lower = "square", upper = "circle")

corrplot(M, order = "AOE")

corrplot(M, order = "hclust")

corrplot(M, order = "FPC")

corrplot(M, order = "alphabet")

corrplot(M, order = "hclust", addrect = 2)

corrplot(M, order = "hclust", addrect = 3)

col1 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "white", "cyan", 
                           "#007FFF", "blue", "#00007F"))
col2 <- colorRampPalette(c("#67001F", "#B2182B", "#D6604D", "#F4A582", "#FDDBC7", 
                           "#FFFFFF", "#D1E5F0", "#92C5DE", "#4393C3", "#2166AC", "#053061"))
col3 <- colorRampPalette(c("red", "white", "blue"))
col4 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "#7FFF7F", 
                           "cyan", "#007FFF", "blue", "#00007F"))
wb <- c("white", "black")
## using these color spectrums
corrplot(M, order = "hclust", addrect = 2, col = col1(100))

corrplot(M, order = "hclust", addrect = 2, col = col2(50))

corrplot(M, order = "hclust", addrect = 2, col = col3(20))

corrplot(M, order = "hclust", addrect = 2, col = col4(10))

corrplot(M, order = "hclust", addrect = 2, col = wb, bg = "gold2")

## remove color legend and text legend
corrplot(M, order = "AOE", cl.pos = "n", tl.pos = "n")

## bottom color legend, diagonal text legend, rotate text label
corrplot(M, order = "AOE", cl.pos = "b", tl.pos = "d", tl.srt = 60)

## a wider color legend with numbers right aligned
corrplot(M, order = "AOE", cl.ratio = 0.2, cl.align = "r")

corrplot(abs(M), order = "AOE", col = col3(200), cl.lim = c(0, 1))

## visualize a matrix in [-100, 100]
ran <- round(matrix(runif(225, -100, 100), 15))
corrplot(ran, is.corr = FALSE, method = "square")

## a beautiful color legend
corrplot(ran, is.corr = FALSE, method = "ellipse", cl.lim = c(-100, 100))

cor.mtest <- function(mat, conf.level = 0.95) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
  diag(p.mat) <- 0
  diag(lowCI.mat) <- diag(uppCI.mat) <- 1
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], conf.level = conf.level)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
      lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
      uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
    }
  }
  return(list(p.mat, lowCI.mat, uppCI.mat))
}

res1 <- cor.mtest(mtcars, 0.95)
res2 <- cor.mtest(mtcars, 0.99)
## specialized the insignificant value according to the significant level
corrplot(M, p.mat = res1[[1]], sig.level = 0.2)

corrplot(M, p.mat = res1[[1]], sig.level = 0.05)

corrplot(M, p.mat = res1[[1]], sig.level = 0.01)

## leave blank on no significant coefficient
corrplot(M, p.mat = res1[[1]], insig = "blank")

## add p-values on no significant coefficient
corrplot(M, p.mat = res1[[1]], insig = "p-value")

## add all p-values
corrplot(M, p.mat = res1[[1]], insig = "p-value", sig.level = -1)

## add cross on no significant coefficient
corrplot(M, p.mat = res1[[1]], order = "hclust", insig = "pch", addrect = 3)

## plot confidence interval(0.95, 0.95, 0.99), 'rect' method
corrplot(M, low = res1[[2]], upp = res1[[3]], order = "hclust", rect.col = "navy", 
         plotC = "rect", cl.pos = "n")

corrplot(M, p.mat = res1[[1]], low = res1[[2]], upp = res1[[3]], order = "hclust", 
         pch.col = "red", sig.level = 0.01, addrect = 3, rect.col = "navy", plotC = "rect", 
         cl.pos = "n")

for (i in seq(0.1, 0, -0.005)) {
  tmp <- cor.mtest(mtcars, 1 - i)
  corrplot(M, p.mat = tmp[[1]], low = tmp[[2]], upp = tmp[[3]], order = "hclust", 
           pch.col = "red", sig.level = i, plotC = "rect", cl.pos = "n", mar = c(0, 
                                                                                 0, 1, 0), title = substitute(alpha == x, list(x = format(i, digits = 3, 
                                                                                                                                          nsmall = 3))))
}